/***************************************************************************************
Firm-embedded productivity and cross-country income differences
Alviarez, Cravino and Ramondo
Journal of Political Economy (2022)

Program: table_C1_contribution_variance.do
Date: October 2022

Description: Reproduces Table C.1: Contribution to Var sni_j(w)
*****************************************************************************************/

*-------------------------------------------------------------------------------
global typeden=1
include "set_directories.do"
set memory 64g
global lf "LF"

*Log
cap log close
log using "${clogs}/table_C1_contribution_variance.log", replace
*-------------------------------------------------------------------------------



if `orbisaccess'==1 {

*Get the data in order to calculate the K-means to form country groups
*--------------------------------------------------------------------------
local title "naics_sales_s1_base_woparent"
local yy "2016"
clear all
use year sector isocode guo_bvd sin_sales lhs2 using "${data}/pre_reg0_`title'_`yy'.dta", clear
order year sector isocode guo_bvd sin_sales lhs2

duplicates drop
sort year isocode guo_bvd
egen iso=group(isocode)
egen guo=group(guo_bvd)
egen sss=group(sector)
sort year sss iso guo 
tempfile LCS_prereg
save `LCS_prereg', replace

keep year sss iso guo sin_sales lhs2
sort year sss iso guo 
order year sss guo iso sin_sales lhs2
export delimited using "${data}/LCS_prereg_`title'_`yy'.csv", novarnames replace

*************************************************************************
******************* RUN MATLAB PROGRAM k_means_acr **********************
*************************************************************************
}


*Import the csv file created in MATLAB
*-----------------------------------------------------------------------
*Where this from matlab happens
clear all
import delimited "${data}/kmeans_prereg_naics_sales_s1_base_fromMatlab.csv"
rename *1 sector_g
rename *2 isocode_g
rename *3 isocode_kmean
tempfile isocode_kmeans
save `isocode_kmeans', replace

use `LCS_prereg', clear
rename sss sector_g
rename iso isocode_g
merge m:1 sector_g isocode_g using `isocode_kmeans'
drop if _merge==2
drop _merge 
tempfile kmeans_frommatlab_forreg
save `kmeans_frommatlab_forreg', replace



*===============================================================================
*Regression 
*===============================================================================
*Drop the dummies corresponding to the country of reference in order to have country FE relative to the Reference Country (France in this case)
use "${data}/pre_reg0_`title'_`yy'.dta", clear
local countwithref=0
foreach v of varlist aiso*sec* {
display "`v'"
local countwithref=`countwithref'+1
       local x : variable label `v'
        if strpos("`x'", "$ctryrel/")>0 drop `v' 
}
display "`countwithref'"

*Counting once the ref country is dropped. 
local countnoref=0
foreach v of varlist aiso*sec* {
local countnoref=`countnoref'+1
display "`v'"
}
display "`countnoref'"


*====================================================================================
display "Saving each predicted component (before start) for variance decomposition calculation"
*====================================================================================
clear all
set maxvar 32000
use `kmeans_frommatlab_forreg', clear 
merge 1:1 isocode guo_bvd sector using "${data}/pre_reg0_`title'_`yy'.dta", keepusing(dist* lang* double_cluster guoFE aiso*sec* piso*sec* parent aff) 
tab _merge
drop _merge 

capture drop guoFE double_cluster
egen guoFE=group(guo_bvd sector)
egen double_cluster=group(isocode sector)
egen A_dummy=group(isocode sector)

*Intereaction term: A(k)*firmFE
*------------------------
egen kmeaniso_guo=group(isocode_kmean guoFE sector)
egen A_dummy_kmean=group(isocode_kmean sector)

tempfile temp0
save `temp0', replace 


*Baseline (linear)
*-----------------------
use `temp0', clear
reghdfe lhs2 dist_sec* lang_sec*, abs(A_0=i.A_dummy#i.aff guo_0=guoFE, savefe) vce(cluster double_cluster) residuals(resid_0)
display "R-squared: `e(r2)'"   
local R2_0=`e(r2)'

reg A_0 lhs2
mat b = e(b)
global A_0: display %-03.2fc round(b[1,1],0.0001)

reg guo_0 lhs2
mat b = e(b)
global guo_0: display %-03.2fc round(b[1,1],0.0001)

display "`R2_0'"
display "$A_0"
display "$guo_0"


*Baseline (linear -- K-means)
*-----------------------
use `temp0', clear

reghdfe lhs2 dist_sec* lang_sec*, abs(A_1=i.A_dummy_kmean#i.aff guo_1=guoFE, savefe) vce(cluster double_cluster) residuals(resid_1)
display "R-squared: `e(r2)'"   
local R2_1=`e(r2)'

reg A_1 lhs2
mat b = e(b)
global A_1: display %-03.2fc round(b[1,1],0.0001)

reg guo_1 lhs2
mat b = e(b)
global guo_1: display %-03.2fc round(b[1,1],0.0001)

display "`R2_1'"
display "$A_1"
display "$guo_1"


*Specification: (interaction -- A(k)*firmFE)
*-----------------------
use `temp0', clear
reghdfe lhs2  dist_sec* lang_sec*, abs(A_2=i.guoFE inter_2=i.kmeaniso_guo#i.aff, savefe) vce(cluster double_cluster) residuals(resid_2) 
display " R-squared: `e(r2)'"          
local R2_2=`e(r2)'

use `temp0', clear
reghdfe lhs2  dist_sec* lang_sec*, abs(i.kmeaniso_guo#i.aff, savefe) vce(cluster double_cluster) 
display " R-squared: `e(r2)'"          
local R2_2=`e(r2)'


log close